Praxis KJV 작업일지

022726

원본 = KJV Bible_original.txt
새이름 = PKJV.txt
ReformatVerse.py = 혼합된 절을 잘 분리해서 정돈해줌
ReformatChapter.py = 혼합된 절을 포함해서 장 표시를 삽입시켜 줌. 결과 = PKJV_Final_Ch.md 와 PKJV_Final_Ch.txt

아래 파일을 사용 완료

import re
import os

def kjv_expert_formatter(input_file, md_output, txt_output):
    # KJV 전용 도서 명칭 리스트 (기존과 동일)
    kjv_book_titles = [
        "The Old Testament of the King James Version of the Bible",
        "The First Book of Moses: Called Genesis",
        "The Second Book of Moses: Called Exodus",
        # ... (이전 리스트와 동일하게 유지)
        "The Revelation of Saint John the Divine"
    ]

    if not os.path.exists(input_file):
        print(f"❌ 원본 파일을 찾을 수 없습니다: {input_file}")
        return

    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()

    # 1. 전처리: 문장 파편화 제거 (Unwrap)
    content = re.sub(r'\s+', ' ', content)

    # 2. 절 번호(장:절) 패턴 앞에 강제 줄바꿈 삽입
    content = re.sub(r'(\d+:\d+)', r'\n\1', content)

    # 3. KJV 제목 마킹
    for title in kjv_book_titles:
        content = content.replace(title, f"\n[KJV_TITLE]{title}[/KJV_TITLE]\n")

    lines = content.split('\n')
    md_final = []
    txt_final = []
    
    current_chapter = None  # 현재 추적 중인 장 번호

    for line in lines:
        line = line.strip()
        if not line: continue

        # [KJV 제목 처리]
        if "[KJV_TITLE]" in line:
            clean_title = line.replace("[KJV_TITLE]", "").replace("[/KJV_TITLE]", "")
            md_final.append(f"\n# {clean_title}\n")
            txt_final.append(f"\n[{clean_title}]\n")
            current_chapter = None # 책이 바뀌면 장 번호 초기화
        
        # [성경 본문 및 장 구분 처리]
        elif re.match(r'^(\d+):(\d+)', line):
            match = re.match(r'^(\d+):(\d+)', line)
            chapter_num = match.group(1) # 장 번호 추출
            
            # 장 번호가 바뀌었는지 확인 (예: 1에서 2로)
            if chapter_num != current_chapter:
                current_chapter = chapter_num
                # 마크다운에는 H2 헤더로 장 표시
                md_final.append(f"\n## {current_chapter} Chapter\n")
                # 텍스트 파일에는 구분선과 함께 장 표시
                txt_final.append(f"\n--- {current_chapter} Chapter ---\n")

            # 본문 가공
            formatted_line = re.sub(r'^(\d+:\d+)\s*', r'\1 ', line)
            md_final.append(formatted_line)
            txt_final.append(formatted_line)

    # 4. 결과 저장
    with open(md_output, 'w', encoding='utf-8') as f:
        f.write("\n\n".join(md_final))
    
    with open(txt_output, 'w', encoding='utf-8') as f:
        f.write("\n".join(txt_final))

    print(f"✅ 장(Chapter) 구분 포함 정밀 편집 완료!")

if __name__ == "__main__":
    BASE_PATH = r"D:\Kee_Drive\SynologyDrive\KeeDian\03_Knowledge\00_Praxis Bible\PraxisKJV"
    IN_FILE = os.path.join(BASE_PATH, "PKJV.txt")
    MD_OUT = os.path.join(BASE_PATH, "PKJV_Final_Ch.md")
    TXT_OUT = os.path.join(BASE_PATH, "PKJV_Final_Ch.txt")
    kjv_expert_formatter(IN_FILE, MD_OUT, TXT_OUT)

🛠 통합된 핵심 기능 분석

책 제목 및 절 분리 (Unwrap & Split): 먼저 텍스트 전체의 줄바꿈을 제거하여 파편화된 문장들을 하나로 합친 뒤, 정규표현식을 통해 문장 중간에 숨어 있는 장:절 패턴을 찾아내어 강제로 줄을 바꿉니다. 이 과정에서 이전 코드에서 해결했던 '절끼리 붙어 있는 문제'가 완벽히 해결됩니다.
KJV 전용 제목 보호 (White-list Matching): 사용자가 제공한 KJV 고유의 긴 제목 리스트를 기준으로 텍스트를 검색합니다. 본문 내용 중 제목과 일치하는 텍스트가 발견되면 이를 별도의 제목 마커([KJV_TITLE])로 감싸서 본문과 격리하므로, 본문 일부가 제목(#)으로 오해받는 일을 원천 차단합니다.
자동 장(Chapter) 구분 삽입: 각 라인을 처리할 때마다 절 번호의 앞 숫자(장 번호)를 추적합니다. 숫자가 바뀌는 순간(예: 1:31 처리 후 2:1 발견 시)을 포착하여 그 사이에 ## {장번호} Chapter라는 마크다운 헤더를 자동으로 삽입합니다.

📝 결과물 구조 예시

이 코드를 실행하면 내부적으로 다음과 같은 흐름으로 데이터가 정렬됩니다:

# The First Book of Moses: Called Genesis (책 제목 감지)
## 1 Chapter (첫 장 시작 감지)
1:1 In the beginning... (절 단위 정렬)
...
1:31 And God saw... (1장의 마지막 절)
## 2 Chapter (숫자 변화 감지 후 자동 삽입)
2:1 Thus the heavens... (2장의 시작)